#always
import pandas as pd
import geopandas as gpd
import plotly.express as px
from sodapy import Socrata
import contextily as ctx
import matplotlib.pyplot as plt
#always 2
import requests
from pandas.io.json import json_normalize
from geopandas.tools import sjoin
import re as re
#data viz
import seaborn as sns
#explore point patterns
from pointpats import centrography
from matplotlib.patches import Ellipse
import numpy
#Creating a socrata client
# connect to the data portal
client = Socrata("data.lacity.org", None)
results = client.get("vt5t-mscf",
where = "Locarea IN ('WILSHIRE BL', 'LINCOLN BL', '7TH ST (SP)', '7TH ST /W (SP)', '7TH ST /W', '7TH ST /E', '4TH ST', '4TH ST /W (SP)', '4TH ST /W', '4TH ST /E', '5TH ST /W (SP)', '5TH ST /W', '5TH ST /E')",
limit = 5010)
# Convert to pandas DataFrame
df = pd.DataFrame.from_records(results)
# check it out
df.sample(2)
WARNING:root:Requests made without an app_token will be subject to strict throttling limits.
| trees_id | trees_id2 | locarea | locunit | locsubunit | locsite | spp | common | botanical | on_strt | ... | location_1 | tg_pg | gisno | x_original | y_original | des_spp1 | grow | un | blsd | blsdval | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3058 | 120629 | 384592f4185 | WILSHIRE BL | 10101 | S | 14 | 96 | CALIFORNIA SYCAMORE | PLATANUS RACEMOSA | COMSTOCK AV | ... | {'latitude': '-118.42', 'longitude': '34.0676'... | 632 | 328634 | 0 | 0 | 0 | 99 | 44 | 178 | 178 |
| 2361 | 518117 | 37a0992b283 | LINCOLN BL | 8040 | F | 7 | 756 | LONDON PLANE | Platanus acerifolia | LINCOLN BL | ... | {'latitude': '-118.424', 'longitude': '33.965'... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 rows × 73 columns
df['x'] = df.x.astype(float)
df['y'] = df.y.astype(float)
#select needed columns
df = df[['trees_id','cd','common','botanical','dbh','x','y']]
df = df.rename(columns={'x': 'longitude', 'y': 'latitude'})
df.isna().any()
trees_id False cd True common False botanical False dbh False longitude True latitude True dtype: bool
df = df.dropna(subset=['latitude', 'longitude', 'common'])
#remove unmapped rows
df = df[df['latitude'] != 0]
#remove irrelevant rows
df = df[~df['common'].str.contains('NULL|OTHER|STUMP|VACANT')]
#remove NULL from df.dbh for later
df = df[~df['dbh'].str.contains('NULL')]
#get rid of palms, not even real trees
xdf = df[~df['common'].str.contains('PALM')]
#convert to gpd
shade = gpd.GeoDataFrame(xdf,
crs="EPSG:4326",
geometry=gpd.points_from_xy(x=xdf.longitude, y=xdf.latitude))
#all trees and tree-like plants
trees = gpd.GeoDataFrame(df,
crs="EPSG:4326",
geometry=gpd.points_from_xy(x=df.longitude, y=df.latitude))
shade.shape
(1793, 8)
shade.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f07b39dabb0>
shade.head(2)
| trees_id | cd | common | botanical | dbh | longitude | latitude | geometry | |
|---|---|---|---|---|---|---|---|---|
| 2261 | 425276 | 6 | CAMPHOR TREE | Cinnamomum camphora | 0 - 3" | -118.458 | 33.9966 | POINT (-118.45800 33.99660) |
| 2270 | 433530 | 6 | EVERGREEN PEAR | Pyrus kawakamii | 6 - 12" | -118.449 | 33.9923 | POINT (-118.44900 33.99230) |
Now we have 2 df, the one with all the shade trees and the one with all the trees, which will be used to compare most popular trees later
url = "https://api.metro.net/agencies/lametro/routes/20/stops/"
stops = pd.read_json(url)
stops = pd.DataFrame(stops['items'].tolist())
stops = stops.sort_values(by='display_name')
stops.head(2)
| id | display_name | latitude | longitude | |
|---|---|---|---|---|
| 14 | 03179 | 4th / Arizona | 34.01792 | -118.49701 |
| 164 | 00038 | 5th / Colorado | 34.01388 | -118.49020 |
stops['unique_name'] = stops['display_name']+" ("+stops['id']+")"
stops.head(2)
| id | display_name | latitude | longitude | unique_name | |
|---|---|---|---|---|---|
| 14 | 03179 | 4th / Arizona | 34.01792 | -118.49701 | 4th / Arizona (03179) |
| 164 | 00038 | 5th / Colorado | 34.01388 | -118.49020 | 5th / Colorado (00038) |
stops = gpd.GeoDataFrame(stops,
crs="EPSG:4326",
geometry=gpd.points_from_xy(x=stops.longitude, y=stops.latitude))
stops.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f07b37eeb20>
# reproject the data
stops = stops.to_crs('EPSG:3857')
shade = shade.to_crs('EPSG:3857')
trees = trees.to_crs('EPSG:3857')
# use subplots that make it easier to create multiple layered maps
fig, ax = plt.subplots(figsize=(15, 15))
stops.plot(ax=ax,
legend=True,
alpha=0.5,
color='red')
shade.plot(ax=ax,
color='green',
marker = '+',
alpha=0.5,
legend=True)
ax.axis('off')
ax.legend()
ax.set_title('Bus stops and trees',fontsize=22)
ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)
WARNING:matplotlib.legend:No handles with labels found to put in legend.
# make a copy of stops first
stop_buffer = stops
# assign its geometry column to be the buffer
stop_buffer['geometry'] = stops.buffer(100)
stop_buffer.plot(figsize=(12,12),alpha=0.5)
<matplotlib.axes._subplots.AxesSubplot at 0x7f07b37c2f10>
stop_buffer.shape
(183, 6)
stops.shape
(183, 6)
shade.shape
(1793, 8)
trees.shape
(2123, 8)
stop_buffer.head()
| id | display_name | latitude | longitude | unique_name | geometry | |
|---|---|---|---|---|---|---|
| 14 | 03179 | 4th / Arizona | 34.01792 | -118.49701 | 4th / Arizona (03179) | POLYGON ((-13190926.814 4031208.498, -13190927... |
| 164 | 00038 | 5th / Colorado | 34.01388 | -118.49020 | 5th / Colorado (00038) | POLYGON ((-13190168.728 4030665.910, -13190169... |
| 80 | 03033 | 5th / Colorado | 34.01474 | -118.49117 | 5th / Colorado (03033) | POLYGON ((-13190276.697 4030781.406, -13190277... |
| 161 | 03120 | 5th / Santa Monica | 34.01755 | -118.49449 | 5th / Santa Monica (03120) | POLYGON ((-13190646.289 4031158.806, -13190646... |
| 68 | 104710 | 7th / Hill | 34.04594 | -118.25425 | 7th / Hill (104710) | POLYGON ((-13163902.894 4034972.315, -13163903... |
# join trees to buffers
#we want each bus stop buffer to be a row, so take data from right column to newgdf column
njoin = gpd.sjoin(shade, stop_buffer, how='right')
pjoin = gpd.sjoin(trees, stop_buffer, how='right')
njoin.shape
(1821, 14)
njoin.head(2)
| index_left | trees_id | cd | common | botanical | dbh | longitude_x | latitude_x | id | display_name | latitude_y | longitude_y | unique_name | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5 | 2784.0 | 7588 | 5 | RED GUM | EUCALYPTUS CAMALDULENSIS | 36-42 | -118.425 | 34.0680 | 08392 | Wilshire / Comstock | 34.06785 | -118.42553 | Wilshire / Comstock (08392) | POLYGON ((-13182969.685 4037916.266, -13182970... |
| 5 | 2804.0 | 25928 | 5 | CARROTWOOD | Cupaniopsis anacardioides | 3-12 | -118.425 | 34.0679 | 08392 | Wilshire / Comstock | 34.06785 | -118.42553 | Wilshire / Comstock (08392) | POLYGON ((-13182969.685 4037916.266, -13182970... |
#i wanted to compare shade trees to all trees on this route, post-buffer
pdf = pjoin #all trees include palms
xdf = njoin #shade trees Xcluding palms
Next, I'll compare top ten shade trees to top ten of all trees
type_counts = xdf.common.value_counts()
type_counts = type_counts.reset_index()
type_counts.columns = ['common_name','count']
topshade = type_counts[type_counts["count"] > 12]
topshade
| common_name | count | |
|---|---|---|
| 0 | INDIAN LAUREL FIG | 787 |
| 1 | JACARANDA | 235 |
| 2 | SOUTHERN MAGNOLIA | 194 |
| 3 | CAMPHOR TREE | 112 |
| 4 | CRAPE MYRTLE | 94 |
| 5 | LONDON PLANE | 78 |
| 6 | PURPLE-LEAF PLUM | 55 |
| 7 | THORNLESS HONEY LOCUST | 36 |
| 8 | SILK-FLOSS TREE | 25 |
| 9 | QUEENSLAND PITTOSPORUM | 19 |
palm_type_counts = pdf.common.value_counts()
palm_type_counts = palm_type_counts.reset_index()
palm_type_counts.columns = ['common_name','count']
toptrees = palm_type_counts[palm_type_counts["count"] > 35]
toptrees
| common_name | count | |
|---|---|---|
| 0 | INDIAN LAUREL FIG | 787 |
| 1 | JACARANDA | 235 |
| 2 | MEXICAN FAN PALM | 212 |
| 3 | SOUTHERN MAGNOLIA | 194 |
| 4 | CAMPHOR TREE | 112 |
| 5 | CRAPE MYRTLE | 94 |
| 6 | LONDON PLANE | 78 |
| 7 | PURPLE-LEAF PLUM | 55 |
| 8 | THORNLESS HONEY LOCUST | 36 |
| 9 | DATE PALM | 36 |
fig = px.bar(topshade,
x='common_name',
y='count',
color='common_name',
title='Most Popular Shade Trees',
labels={'common_name':'Species', 'count':'Counts'}
)
fig.update(layout_showlegend=False)
fig.write_html("pop_shade_trees.html")
fig = px.bar(toptrees,
x='common_name',
y='count',
color='common_name',
title='Most Popular Trees',
labels={'common_name':'Species', 'count':'Counts'}
)
fig.update(layout_showlegend=False)
fig.write_html("pop_trees.html")
I want to separate dbh so that I get the highest number in the range into its own column
arr = xdf['dbh'].str.split(r'[-|>|+]', expand = True)
arr = arr.replace(r'^\s*$', '36',regex=True)
xdf[['dbh1', 'dbh2']] = arr
xdf.head(2)
| index_left | trees_id | cd | common | botanical | dbh | longitude_x | latitude_x | id | display_name | latitude_y | longitude_y | unique_name | geometry | dbh1 | dbh2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5 | 2784.0 | 7588 | 5 | RED GUM | EUCALYPTUS CAMALDULENSIS | 36-42 | -118.425 | 34.0680 | 08392 | Wilshire / Comstock | 34.06785 | -118.42553 | Wilshire / Comstock (08392) | POLYGON ((-13182969.685 4037916.266, -13182970... | 36 | 42 |
| 5 | 2804.0 | 25928 | 5 | CARROTWOOD | Cupaniopsis anacardioides | 3-12 | -118.425 | 34.0679 | 08392 | Wilshire / Comstock | 34.06785 | -118.42553 | Wilshire / Comstock (08392) | POLYGON ((-13182969.685 4037916.266, -13182970... | 3 | 12 |
#remove the inches symbol from dbh2
xdf['dbh2'] = xdf['dbh2'].str.replace(r'\D', '')
xdf =xdf.sort_values(by='dbh2',ascending=False)
diameter_counts = xdf.dbh2.value_counts()
diameter_counts = diameter_counts.reset_index()
diameter_counts.columns = ['diameter','count']
#diameter_counts=diameter_counts.sort_values(by='diameter',ascending=True)
diameter_counts
| diameter | count | |
|---|---|---|
| 0 | 12 | 553 |
| 1 | 18 | 410 |
| 2 | 3 | 385 |
| 3 | 24 | 192 |
| 4 | 6 | 140 |
| 5 | 36 | 33 |
| 6 | 30 | 24 |
| 7 | 42 | 4 |
dbh = diameter_counts.astype('int64').sort_values(by='diameter',ascending=True)
dbh
| diameter | count | |
|---|---|---|
| 2 | 3 | 385 |
| 4 | 6 | 140 |
| 0 | 12 | 553 |
| 1 | 18 | 410 |
| 3 | 24 | 192 |
| 6 | 30 | 24 |
| 5 | 36 | 33 |
| 7 | 42 | 4 |
pig = px.bar(dbh,
x='diameter',
y='count',
color='diameter',
title='Shade Trees by Age & Canopy',
labels={'diameter':'DBH', 'count':'Counts'}
)
pig.show()
pig.write_html("dbh_bar_graph.html")
# new dataframe for count per stop
trees_by_stop = njoin.groupby(['id']).size().reset_index(name='treecount').sort_values(by='treecount')
trees_by_stop.sample(2)
| id | treecount | |
|---|---|---|
| 87 | 08468 | 18 |
| 133 | 16710 | 23 |
# join the count back to the buffers
stop_buffer=stop_buffer.merge(trees_by_stop,on='id')
stop_buffer.head(5)
| id | display_name | latitude | longitude | unique_name | geometry | treecount | |
|---|---|---|---|---|---|---|---|
| 0 | 03179 | 4th / Arizona | 34.01792 | -118.49701 | 4th / Arizona (03179) | POLYGON ((-13190926.814 4031208.498, -13190927... | 1 |
| 1 | 00038 | 5th / Colorado | 34.01388 | -118.49020 | 5th / Colorado (00038) | POLYGON ((-13190168.728 4030665.910, -13190169... | 1 |
| 2 | 03033 | 5th / Colorado | 34.01474 | -118.49117 | 5th / Colorado (03033) | POLYGON ((-13190276.697 4030781.406, -13190277... | 1 |
| 3 | 03120 | 5th / Santa Monica | 34.01755 | -118.49449 | 5th / Santa Monica (03120) | POLYGON ((-13190646.289 4031158.806, -13190646... | 1 |
| 4 | 104710 | 7th / Hill | 34.04594 | -118.25425 | 7th / Hill (104710) | POLYGON ((-13163902.894 4034972.315, -13163903... | 1 |
stop_buffer.mean()
id inf latitude 34.05466 longitude -118.37209 treecount 10.00000 dtype: float64
mean_treecount = round(stop_buffer['treecount'].mean(), 1)
mean_treecount
10.0
median_treecount = round(stop_buffer['treecount'].median(), 1)
median_treecount
5.0
mode_treecount = round(stop_buffer['treecount'].mode(), 1)
mode_treecount
0 1 dtype: int64
stop_buffer['treecount'].plot(kind='hist', figsize=(10, 8), linewidth=2, color='seagreen', edgecolor='gray')
plt.xlabel("Number of trees", labelpad=15)
plt.ylabel("Frequency", labelpad=15)
plt.title("Number of Trees within 100 m buffer of bus stops along Route 20", y=1.012, fontsize=20)
measurements = [median_treecount, mean_treecount]
names=["median", "mean"]
colors = ['blue', 'orange']
for measurement, name, color in zip(measurements, names, colors):
plt.axvline(x=measurement, linestyle='--', linewidth=2.5, label='{0} at {1}'.format(name, measurement), c=color)
plt.legend();
#XXX.write_html("Tree_Stops_Stats.html")
stop_buffer.median()
id 9116.500000 latitude 34.061595 longitude -118.368275 treecount 5.000000 dtype: float64
# sort it
stop_buffer=stop_buffer.sort_values(by='treecount', ascending=False)
stop_buffer.head()
| id | display_name | latitude | longitude | unique_name | geometry | treecount | |
|---|---|---|---|---|---|---|---|
| 93 | 16728 | Wilshire / Kingsley | 34.06186 | -118.30270 | Wilshire / Kingsley (16728) | POLYGON ((-13169296.323 4037111.340, -13169296... | 61 |
| 92 | 08428 | Wilshire / Kingsley | 34.06161 | -118.30278 | Wilshire / Kingsley (08428) | POLYGON ((-13169305.229 4037077.734, -13169305... | 61 |
| 69 | 16713 | Wilshire / Coronado | 34.06070 | -118.28149 | Wilshire / Coronado (16713) | POLYGON ((-13166935.226 4036955.455, -13166935... | 54 |
| 70 | 08414 | Wilshire / Coronado | 34.06047 | -118.28164 | Wilshire / Coronado (08414) | POLYGON ((-13166951.935 4036924.563, -13166952... | 54 |
| 74 | 02449 | Wilshire / Curson | 34.06233 | -118.35461 | Wilshire / Curson (02449) | POLYGON ((-13175074.907 4037174.483, -13175075... | 54 |
stop_buffer.shape
(182, 7)
stop_buffer.sum()
id 1672808428167130841402449167000844916750167440... display_name Wilshire / KingsleyWilshire / KingsleyWilshire... latitude 6197.95 longitude -21543.7 unique_name Wilshire / Kingsley (16728)Wilshire / Kingsley... treecount 1820 dtype: object
# make a stacked bar chart
fig = px.bar(stop_buffer[stop_buffer['treecount']>25],
y='unique_name',
x='treecount',
height=800,
orientation='h'
)
fig.update_layout(yaxis_type='category') # need this to force the id to be categorical
fig.show()
fig.write_html("stacked_bar.html")
# import that interact library
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
shadestops = stop_buffer
alltrees = trees_by_stop.treecount.tolist()
print("Original List : ",alltrees)
my_set = set(alltrees)
my_new_list = list(my_set)
print("List of unique numbers : ",my_new_list)
Original List : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 21, 21, 21, 21, 23, 23, 24, 24, 26, 26, 26, 27, 27, 27, 28, 28, 29, 29, 29, 30, 36, 39, 39, 43, 43, 44, 54, 54, 54, 61, 61] List of unique numbers : [1, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 24, 26, 27, 28, 29, 30, 36, 39, 43, 44, 54, 61]
strees = my_new_list
# table
display(shadestops[shadestops.treecount == 1].head())
# map
ax = shadestops[shadestops.treecount == 1].plot(figsize=(35,35), markersize=1)
# axis
ax.axis('off')
# basemap
ctx.add_basemap(ax,source=ctx.providers.CartoDB.DarkMatter)
| id | display_name | latitude | longitude | unique_name | geometry | treecount | |
|---|---|---|---|---|---|---|---|
| 173 | 08470 | Wilshire / Whittier | 34.06697 | -118.41480 | Wilshire / Whittier (08470) | POLYGON ((-13181775.227 4037798.010, -13181775... | 1 |
| 124 | 08442 | Wilshire / Peck | 34.06695 | -118.40317 | Wilshire / Peck (08442) | POLYGON ((-13180480.593 4037795.322, -13180481... | 1 |
| 180 | 16780 | Wilshire / Yale | 34.03850 | -118.47463 | Wilshire / Yale (16780) | POLYGON ((-13188435.472 4033972.808, -13188435... | 1 |
| 175 | 08457 | Wilshire / Willaman | 34.06610 | -118.38030 | Wilshire / Willaman (08457) | POLYGON ((-13177934.716 4037681.098, -13177935... | 1 |
| 174 | 16759 | Wilshire / Willaman | 34.06635 | -118.38027 | Wilshire / Willaman (16759) | POLYGON ((-13177931.376 4037714.693, -13177931... | 1 |
def place_by(treecount = 1):
# table
display(shadestops[shadestops.treecount == treecount].head())
# map
ax = shadestops[shadestops.treecount == treecount].plot(figsize=(9,9), markersize=2)
# axis
ax.axis('off')
# basemap
ctx.add_basemap(ax,source=ctx.providers.CartoDB.DarkMatter)
#place_by(treecount = 65) #error but don't know why
@interact
# create a function
def stops_by(treecount =strees):
display(shadestops[shadestops.treecount == treecount].head())
# map
ax = shadestops[shadestops.treecount == treecount].plot(figsize=(60,40), markersize=2)
# axis
ax.axis('off')
# basemap
ctx.add_basemap(ax,source=ctx.providers.CartoDB.DarkMatter)
fig.write_html("interact_trees.html")
benches=gpd.read_file('../data/Bus_Stop_Benches.shp')
benches.crs
<Geographic 2D CRS: EPSG:4326> Name: WGS 84 Axis Info [ellipsoidal]: - Lat[north]: Geodetic latitude (degree) - Lon[east]: Geodetic longitude (degree) Area of Use: - name: World - bounds: (-180.0, -90.0, 180.0, 90.0) Datum: World Geodetic System 1984 - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
#Reproject the data
benches = benches.to_crs('EPSG:3857')
#Spatial join of benches and stops
bjoin=gpd.sjoin(benches, stop_buffer, how='right')
bjoin.shape
(292, 21)
bjoin.head(2)
| index_left | OBJECTID | FID2 | NUMBER | LATITUDE | LONGITUDE | SITEATS | CITY_SITE | FACING | MEDIA_TYPE | ... | AREA | ADVERTISER | TOOLTIP | id | display_name | latitude | longitude | unique_name | geometry | treecount | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 71 | 8.0 | 9.0 | 1542.0 | LA-1543 | 34.06147 | -118.31958 | EB Wilshire FS Dunsmuir -SE | L.A. Central | West | Bus Bench | ... | CD-04 | MOM L.A. | Location: EB Wilshire FS Dunsmuir -SE\nNumb... | 00928 | Wilshire / Crenshaw | 34.06162 | -118.31957 | Wilshire / Crenshaw (00928) | POLYGON ((-13171174.283 4037079.077, -13171174... | 10 |
| 71 | 9.0 | 10.0 | 1548.0 | LA-1549 | 34.06147 | -118.31958 | EB Wilshire NS La Brea -SW | L.A. Central | East | Bus Bench | ... | CD-04 | Diamond Foam & Fabrics | Location: EB Wilshire NS La Brea -SW\nNumbe... | 00928 | Wilshire / Crenshaw | 34.06162 | -118.31957 | Wilshire / Crenshaw (00928) | POLYGON ((-13171174.283 4037079.077, -13171174... | 10 |
2 rows × 21 columns
# new dataframe for number of benches per stop
benches_by_stop = bjoin.groupby(['id']).size().reset_index(name='benchescount').sort_values(by='benchescount')
benches_by_stop.sample(2)
| id | benchescount | |
|---|---|---|
| 114 | 16680 | 1 |
| 123 | 16695 | 1 |
#Now lets join the benches back to stopsbuffer
stop_buffer=stop_buffer.merge(benches_by_stop,on='id')
stop_buffer.head()
| id | display_name | latitude | longitude | unique_name | geometry | treecount | benchescount | |
|---|---|---|---|---|---|---|---|---|
| 0 | 16728 | Wilshire / Kingsley | 34.06186 | -118.30270 | Wilshire / Kingsley (16728) | POLYGON ((-13169296.323 4037111.340, -13169296... | 61 | 1 |
| 1 | 08428 | Wilshire / Kingsley | 34.06161 | -118.30278 | Wilshire / Kingsley (08428) | POLYGON ((-13169305.229 4037077.734, -13169305... | 61 | 1 |
| 2 | 16713 | Wilshire / Coronado | 34.06070 | -118.28149 | Wilshire / Coronado (16713) | POLYGON ((-13166935.226 4036955.455, -13166935... | 54 | 3 |
| 3 | 08414 | Wilshire / Coronado | 34.06047 | -118.28164 | Wilshire / Coronado (08414) | POLYGON ((-13166951.935 4036924.563, -13166952... | 54 | 3 |
| 4 | 02449 | Wilshire / Curson | 34.06233 | -118.35461 | Wilshire / Curson (02449) | POLYGON ((-13175074.907 4037174.483, -13175075... | 54 | 1 |
stop_buffer = stop_buffer.sort_values(by='benchescount',ascending=False)
stop_buffer
| id | display_name | latitude | longitude | unique_name | geometry | treecount | benchescount | |
|---|---|---|---|---|---|---|---|---|
| 63 | 00928 | Wilshire / Crenshaw | 34.06162 | -118.31957 | Wilshire / Crenshaw (00928) | POLYGON ((-13171174.283 4037079.077, -13171174... | 10 | 23 |
| 35 | 104706 | Wilshire / Bundy | 34.04385 | -118.46756 | Wilshire / Bundy (104706) | POLYGON ((-13187648.444 4034691.531, -13187648... | 18 | 7 |
| 33 | 104702 | Wilshire / Bundy | 34.04408 | -118.46771 | Wilshire / Bundy (104702) | POLYGON ((-13187665.153 4034722.417, -13187665... | 18 | 7 |
| 27 | 08467 | Wilshire / Western | 34.06184 | -118.30874 | Wilshire / Western (08467) | POLYGON ((-13169968.693 4037108.639, -13169969... | 21 | 4 |
| 49 | 16692 | Do Not Announce This Stop! | 34.06309 | -118.36114 | Do Not Announce This Stop! (16692) | POLYGON ((-13175801.835 4037276.607, -13175802... | 14 | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 73 | 16751 | Wilshire / Saint Andrews | 34.06157 | -118.31176 | Wilshire / Saint Andrews (16751) | POLYGON ((-13170304.878 4037072.372, -13170305... | 8 | 1 |
| 72 | 16725 | Wilshire / Highland | 34.06226 | -118.33879 | Wilshire / Highland (16725) | POLYGON ((-13173313.833 4037165.076, -13173314... | 8 | 1 |
| 70 | 08418 | Wilshire / Dunsmuir | 34.06220 | -118.34812 | Wilshire / Dunsmuir (08418) | POLYGON ((-13174352.444 4037157.014, -13174352... | 8 | 1 |
| 68 | 08400 | Wilshire / Lorraine | 34.06189 | -118.32032 | Wilshire / Lorraine (08400) | POLYGON ((-13171257.773 4037115.358, -13171258... | 9 | 1 |
| 181 | 08472 | Wilshire / Yale | 34.03832 | -118.47442 | Wilshire / Yale (08472) | POLYGON ((-13188412.095 4033948.627, -13188412... | 1 | 1 |
182 rows × 8 columns
stop_buffer.sum()
id 0092810470610470208467166920179116777084041670... display_name Wilshire / CrenshawWilshire / BundyWilshire / ... latitude 6197.95 longitude -21543.7 unique_name Wilshire / Crenshaw (00928)Wilshire / Bundy (1... treecount 1820 benchescount 292 dtype: object
mean_benchcount = round(stop_buffer['benchescount'].mean(), 1)
mean_benchcount
1.6
median_benchcount=round(stop_buffer['benchescount'].median(), 1)
median_benchcount
1.0
stop_buffer['benchescount'].plot(kind='hist', figsize=(10, 8), linewidth=2, color='slateblue', edgecolor='gray')
plt.xlabel("Number of benches", labelpad=15)
plt.ylabel("Frequency", labelpad=15)
plt.title("Number of Benches within 100 m buffer of bus stops along Route 20", y=1.012, fontsize=20)
measurements = [median_benchcount, mean_benchcount]
names=["median", "mean"]
colors = ['blue', 'orange']
for measurement, name, color in zip(measurements, names, colors):
plt.axvline(x=measurement, linestyle='--', linewidth=2.5, label='{0} at {1}'.format(name, measurement), c=color)
plt.legend();
fig.write_html("bench_stats.html")
#Lets visualize benches per stop buffer
fig = px.bar(stop_buffer[stop_buffer['benchescount']>5],
y='unique_name',
x='benchescount',
height=800,
orientation='h'
)
fig.update_layout(yaxis_type='category') # need this to force the id to be categorical
fig.show()
fig.write_html("bench_buffer.html")
income=gpd.read_file('../data/acs2018_5yr_B19013_14000US06037204600.geojson')
income.tail()
| geoid | name | B19013001 | B19013001, Error | geometry | |
|---|---|---|---|---|---|
| 2342 | 14000US06037980031 | Census Tract 9800.31, Los Angeles, CA | 64500.0 | 44253.0 | MULTIPOLYGON (((-118.29105 33.75378, -118.2905... |
| 2343 | 14000US06037980033 | Census Tract 9800.33, Los Angeles, CA | NaN | NaN | MULTIPOLYGON (((-118.24897 33.75590, -118.2470... |
| 2344 | 14000US06037990100 | Census Tract 9901, Los Angeles, CA | NaN | NaN | MULTIPOLYGON (((-118.95114 33.99643, -118.9505... |
| 2345 | 14000US06037990200 | Census Tract 9902, Los Angeles, CA | NaN | NaN | MULTIPOLYGON (((-118.63598 34.03255, -118.6325... |
| 2346 | 14000US06037990300 | Census Tract 9903, Los Angeles, CA | NaN | NaN | MULTIPOLYGON (((-118.47656 33.75038, -118.4661... |
income.head()
| geoid | name | B19013001 | B19013001, Error | geometry | |
|---|---|---|---|---|---|
| 0 | 05000US06037 | Los Angeles County, CA | 64251.0 | 247.0 | MULTIPOLYGON (((-118.70339 34.16859, -118.7033... |
| 1 | 14000US06037101110 | Census Tract 1011.10, Los Angeles, CA | 53077.0 | 14466.0 | MULTIPOLYGON (((-118.30229 34.25870, -118.3009... |
| 2 | 14000US06037101122 | Census Tract 1011.22, Los Angeles, CA | 88953.0 | 10913.0 | MULTIPOLYGON (((-118.30334 34.27371, -118.3033... |
| 3 | 14000US06037101210 | Census Tract 1012.10, Los Angeles, CA | 32119.0 | 11957.0 | MULTIPOLYGON (((-118.29945 34.25598, -118.2979... |
| 4 | 14000US06037101220 | Census Tract 1012.20, Los Angeles, CA | 41728.0 | 3976.0 | MULTIPOLYGON (((-118.28593 34.25227, -118.2859... |
#notice for some reason we dont have the sum of LA County data that we talked abt in class.
#so no need to remove...
income=income[['geoid','name','geometry','B19013001',]]
income.columns=['geoid','censustracts','geometry','hhincome']
income.tail()
| geoid | censustracts | geometry | hhincome | |
|---|---|---|---|---|
| 2342 | 14000US06037980031 | Census Tract 9800.31, Los Angeles, CA | MULTIPOLYGON (((-118.29105 33.75378, -118.2905... | 64500.0 |
| 2343 | 14000US06037980033 | Census Tract 9800.33, Los Angeles, CA | MULTIPOLYGON (((-118.24897 33.75590, -118.2470... | NaN |
| 2344 | 14000US06037990100 | Census Tract 9901, Los Angeles, CA | MULTIPOLYGON (((-118.95114 33.99643, -118.9505... | NaN |
| 2345 | 14000US06037990200 | Census Tract 9902, Los Angeles, CA | MULTIPOLYGON (((-118.63598 34.03255, -118.6325... | NaN |
| 2346 | 14000US06037990300 | Census Tract 9903, Los Angeles, CA | MULTIPOLYGON (((-118.47656 33.75038, -118.4661... | NaN |
income.crs
<Geographic 2D CRS: EPSG:4326> Name: WGS 84 Axis Info [ellipsoidal]: - Lat[north]: Geodetic latitude (degree) - Lon[east]: Geodetic longitude (degree) Area of Use: - name: World - bounds: (-180.0, -90.0, 180.0, 90.0) Datum: World Geodetic System 1984 - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
income = income.to_crs('EPSG:3857')
income["hhincome"].describe()
count 2309.000000 mean 69675.671286 std 33863.952081 min 8264.000000 25% 45000.000000 50% 62917.000000 75% 85950.000000 max 250001.000000 Name: hhincome, dtype: float64
income=income.query("hhincome !='NaN'", engine='python')
income.head()
| geoid | censustracts | geometry | hhincome | |
|---|---|---|---|---|
| 0 | 05000US06037 | Los Angeles County, CA | MULTIPOLYGON (((-13214001.153 4051462.206, -13... | 64251.0 |
| 1 | 14000US06037101110 | Census Tract 1011.10, Los Angeles, CA | MULTIPOLYGON (((-13169350.794 4063591.834, -13... | 53077.0 |
| 2 | 14000US06037101122 | Census Tract 1011.22, Los Angeles, CA | MULTIPOLYGON (((-13169467.902 4065613.937, -13... | 88953.0 |
| 3 | 14000US06037101210 | Census Tract 1012.10, Los Angeles, CA | MULTIPOLYGON (((-13169034.646 4063225.625, -13... | 32119.0 |
| 4 | 14000US06037101220 | Census Tract 1012.20, Los Angeles, CA | MULTIPOLYGON (((-13167528.939 4062726.771, -13... | 41728.0 |
#Lets see if we can do spatial join with income.
ijoin=gpd.sjoin(income, stop_buffer, how='right')
ijoin.shape
(565, 12)
ijoin.sample(5)
| index_left | geoid | censustracts | hhincome | id | display_name | latitude | longitude | unique_name | geometry | treecount | benchescount | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 130 | 0 | 05000US06037 | Los Angeles County, CA | 64251.0 | 08377 | Wilshire / 14th | 34.02660 | -118.48890 | Wilshire / 14th (08377) | POLYGON ((-13190024.013 4032374.302, -13190024... | 1 | 1 |
| 25 | 602 | 14000US06037212303 | Census Tract 2123.03, Los Angeles, CA | 34400.0 | 16710 | Wilshire / Catalina | 34.06191 | -118.29523 | Wilshire / Catalina (16710) | POLYGON ((-13168464.767 4037118.046, -13168465... | 23 | 1 |
| 49 | 634 | 14000US06037215101 | Census Tract 2151.01, Los Angeles, CA | 64269.0 | 16692 | Do Not Announce This Stop! | 34.06309 | -118.36114 | Do Not Announce This Stop! (16692) | POLYGON ((-13175801.835 4037276.607, -13175802... | 14 | 4 |
| 98 | 844 | 14000US06037264302 | Census Tract 2643.02, Los Angeles, CA | 95222.0 | 16757 | Wilshire / Westgate | 34.04757 | -118.46346 | Wilshire / Westgate (16757) | POLYGON ((-13187192.034 4035191.305, -13187192... | 3 | 4 |
| 106 | 2117 | 14000US06037700902 | Census Tract 7009.02, Los Angeles, CA | 84347.0 | 08444 | Wilshire / Rexford | 34.06695 | -118.39407 | Wilshire / Rexford (08444) | POLYGON ((-13179467.574 4037795.309, -13179468... | 1 | 1 |
ijoin["hhincome"].describe()
count 565.000000 mean 71421.529204 std 27902.599562 min 10294.000000 25% 64083.000000 50% 64251.000000 75% 87344.000000 max 209890.000000 Name: hhincome, dtype: float64
import matplotlib.pyplot as plt
ax = ijoin.plot(figsize=(15,15),
column='hhincome',
legend=True,
alpha=0.8,
scheme='quantiles')
ax.set_title('Median Household Income along Route 20',fontsize=20)
ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)
fig.write_html("income_route.html")
import matplotlib.pyplot as plt
ax = stop_buffer.plot(figsize=(15,15),
column='treecount',
legend=True,
alpha=1.0,
cmap='RdYlGn',
scheme='naturalbreaks')
ax.set_title('Number of trees within 100 m buffer of bus stops along Route 20',fontsize=20)
ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)
fig.write_html("tree_study.html")
ax = stop_buffer.plot(figsize=(15,15),
column='benchescount',
legend=True,
alpha=0.5,
cmap='RdBu',
scheme='naturalbreaks')
ax.set_title('Number of benches within 100 m buffer of bus stops along Route 20',fontsize=20)
ax.axis('off')
ctx.add_basemap(ax,source=ctx.providers.CartoDB.Positron)
fig.write_html("bench_study.html")
stop_buffer.head()
| id | display_name | latitude | longitude | unique_name | geometry | treecount | benchescount | |
|---|---|---|---|---|---|---|---|---|
| 63 | 00928 | Wilshire / Crenshaw | 34.06162 | -118.31957 | Wilshire / Crenshaw (00928) | POLYGON ((-13171174.283 4037079.077, -13171174... | 10 | 23 |
| 35 | 104706 | Wilshire / Bundy | 34.04385 | -118.46756 | Wilshire / Bundy (104706) | POLYGON ((-13187648.444 4034691.531, -13187648... | 18 | 7 |
| 33 | 104702 | Wilshire / Bundy | 34.04408 | -118.46771 | Wilshire / Bundy (104702) | POLYGON ((-13187665.153 4034722.417, -13187665... | 18 | 7 |
| 27 | 08467 | Wilshire / Western | 34.06184 | -118.30874 | Wilshire / Western (08467) | POLYGON ((-13169968.693 4037108.639, -13169969... | 21 | 4 |
| 49 | 16692 | Do Not Announce This Stop! | 34.06309 | -118.36114 | Do Not Announce This Stop! (16692) | POLYGON ((-13175801.835 4037276.607, -13175802... | 14 | 4 |
df1 = pd.DataFrame(stop_buffer)
dftop5t=df1.nlargest(5,'treecount')
fig=px.bar(dftop5t,
x='unique_name',
y='treecount',
title='Top 5 bus stops with most number of trees',
labels={'unique_name':'Bus Stops','treecount':'Number of trees'}
)
fig.update_traces(marker_line_width=0)
fig.write_html("top_five_trees.html")
dftop5b=df1.nlargest(5,'benchescount')
figb=px.bar(dftop5b,
x='unique_name',
y='benchescount',
title='Top 5 bus stops with most number of benches',
labels={'unique_name':'Bus Stops','benchescount':'Number of benches'}
)
figb.update_traces(marker_line_width=0)
fig.write_html("top_five_benches.html")
dfs5t=df1.nsmallest(5,'treecount')
figst=px.bar(dfs5t,
x='unique_name',
y='treecount',
title='5 Bus stops with least number of trees',
labels={'unique_name':'Bus Stops','treecount':'Number of trees'}
)
figst.update_traces(marker_line_width=0)
fig.write_html("bottom_five_trees.html")
dfs5b=df1.nsmallest(5,'benchescount')
figst=px.bar(dfs5b,
x='unique_name',
y='benchescount',
title='5 Bus stops with least number of benches',
labels={'unique_name':'Bus Stops','benchescount':'Number of benches'}
)
figst.update_traces(marker_line_width=0)
figst.write_html("bottom_five_benches.html")
import numpy as np
# create a list of our conditions
conditions = [
(stop_buffer['treecount'] > 25) & (stop_buffer['benchescount'] > 5),
(stop_buffer['treecount'] > 25),
(stop_buffer['benchescount'] > 5),
(stop_buffer['treecount'] <=25) & (stop_buffer['benchescount']<=5)
]
# create a list of the values we want to assign for each condition
values = ['IdealShade+Bench', 'IdealShade', 'IdealBench', 'Notideal']
# create a new column and use np.select to assign values to it using our lists as arguments
stop_buffer['shadebenchindex'] = np.select(conditions, values)
conditions1 = [
(stop_buffer['treecount'] > 25) & (stop_buffer['benchescount'] > 5),
(stop_buffer['treecount'] > 25),
(stop_buffer['benchescount'] > 5),
(stop_buffer['treecount'] <=25) & (stop_buffer['benchescount']<=5)
]
# create a list of the values we want to assign for each condition
values1 = ['A - both', 'C - shade', 'B - bench', 'D - none']
stop_buffer['indexed'] = np.select(conditions1, values1)
# display updated DataFrame
stop_buffer.head()
| id | display_name | latitude | longitude | unique_name | geometry | treecount | benchescount | shadebenchindex | indexed | |
|---|---|---|---|---|---|---|---|---|---|---|
| 63 | 00928 | Wilshire / Crenshaw | 34.06162 | -118.31957 | Wilshire / Crenshaw (00928) | POLYGON ((-13171174.283 4037079.077, -13171174... | 10 | 23 | IdealBench | B - bench |
| 35 | 104706 | Wilshire / Bundy | 34.04385 | -118.46756 | Wilshire / Bundy (104706) | POLYGON ((-13187648.444 4034691.531, -13187648... | 18 | 7 | IdealBench | B - bench |
| 33 | 104702 | Wilshire / Bundy | 34.04408 | -118.46771 | Wilshire / Bundy (104702) | POLYGON ((-13187665.153 4034722.417, -13187665... | 18 | 7 | IdealBench | B - bench |
| 27 | 08467 | Wilshire / Western | 34.06184 | -118.30874 | Wilshire / Western (08467) | POLYGON ((-13169968.693 4037108.639, -13169969... | 21 | 4 | Notideal | D - none |
| 49 | 16692 | Do Not Announce This Stop! | 34.06309 | -118.36114 | Do Not Announce This Stop! (16692) | POLYGON ((-13175801.835 4037276.607, -13175802... | 14 | 4 | Notideal | D - none |
stop_buffer.tail()
| id | display_name | latitude | longitude | unique_name | geometry | treecount | benchescount | shadebenchindex | indexed | |
|---|---|---|---|---|---|---|---|---|---|---|
| 73 | 16751 | Wilshire / Saint Andrews | 34.06157 | -118.31176 | Wilshire / Saint Andrews (16751) | POLYGON ((-13170304.878 4037072.372, -13170305... | 8 | 1 | Notideal | D - none |
| 72 | 16725 | Wilshire / Highland | 34.06226 | -118.33879 | Wilshire / Highland (16725) | POLYGON ((-13173313.833 4037165.076, -13173314... | 8 | 1 | Notideal | D - none |
| 70 | 08418 | Wilshire / Dunsmuir | 34.06220 | -118.34812 | Wilshire / Dunsmuir (08418) | POLYGON ((-13174352.444 4037157.014, -13174352... | 8 | 1 | Notideal | D - none |
| 68 | 08400 | Wilshire / Lorraine | 34.06189 | -118.32032 | Wilshire / Lorraine (08400) | POLYGON ((-13171257.773 4037115.358, -13171258... | 9 | 1 | Notideal | D - none |
| 181 | 08472 | Wilshire / Yale | 34.03832 | -118.47442 | Wilshire / Yale (08472) | POLYGON ((-13188412.095 4033948.627, -13188412... | 1 | 1 | Notideal | D - none |
stop_buffer['indexed'].value_counts()
D - none 156 C - shade 23 B - bench 3 Name: indexed, dtype: int64
stop_buffer['indexed'].value_counts().plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x7f07a295dc70>
#So none of these stops have ideal shade and bench, based on our definitions..?I dont know if that makes sense.
#Remaining tasks
#Does a stop have bus or bench or both (kind of done or have a first version)
#Function by number of trees or by distance
#Making spatial joined maps prettier
fig = stop_buffer.plot(figsize=(15,15),
column='indexed',
legend=True,
alpha=0.5,
cmap='rainbow')
fig.set_title('Shade and Rest Infrastructure at Bus Stops Route 20',fontsize=20)
fig.axis('off')
ctx.add_basemap(fig,source=ctx.providers.CartoDB.Positron)
fig.write_html("grade_stops.html")
#DOES NOT WORK
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-141-abbc45d886bd> in <module> ----> 1 fig.write_html("grade_stops.html") AttributeError: 'AxesSubplot' object has no attribute 'write_html'
latitude = stop_buffer.latitude.mean()
latitude
longitude = stop_buffer.longitude.mean()
longitude
-118.37209012527475
import folium
m = folium.Map(location=[latitude,longitude], tiles='Stamen Terrain', zoom_start=10)
m
for index, row in stop_buffer.iterrows():
folium.Marker(
[row.latitude,row.longitude],
popup=row.indexed, #what pops up when you click
tooltip=row.unique_name #when you hover on the icon
).add_to(m)
stop_buffer['color'] = ''
stop_buffer.head()
| id | display_name | latitude | longitude | unique_name | geometry | treecount | benchescount | shadebenchindex | indexed | color | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 63 | 00928 | Wilshire / Crenshaw | 34.06162 | -118.31957 | Wilshire / Crenshaw (00928) | POLYGON ((-13171174.283 4037079.077, -13171174... | 10 | 23 | IdealBench | B - bench | |
| 35 | 104706 | Wilshire / Bundy | 34.04385 | -118.46756 | Wilshire / Bundy (104706) | POLYGON ((-13187648.444 4034691.531, -13187648... | 18 | 7 | IdealBench | B - bench | |
| 33 | 104702 | Wilshire / Bundy | 34.04408 | -118.46771 | Wilshire / Bundy (104702) | POLYGON ((-13187665.153 4034722.417, -13187665... | 18 | 7 | IdealBench | B - bench | |
| 27 | 08467 | Wilshire / Western | 34.06184 | -118.30874 | Wilshire / Western (08467) | POLYGON ((-13169968.693 4037108.639, -13169969... | 21 | 4 | Notideal | D - none | |
| 49 | 16692 | Do Not Announce This Stop! | 34.06309 | -118.36114 | Do Not Announce This Stop! (16692) | POLYGON ((-13175801.835 4037276.607, -13175802... | 14 | 4 | Notideal | D - none |
stop_buffer.loc[stop_buffer['indexed'] == 'B - bench']
| id | display_name | latitude | longitude | unique_name | geometry | treecount | benchescount | shadebenchindex | indexed | color | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 63 | 00928 | Wilshire / Crenshaw | 34.06162 | -118.31957 | Wilshire / Crenshaw (00928) | POLYGON ((-13171174.283 4037079.077, -13171174... | 10 | 23 | IdealBench | B - bench | |
| 35 | 104706 | Wilshire / Bundy | 34.04385 | -118.46756 | Wilshire / Bundy (104706) | POLYGON ((-13187648.444 4034691.531, -13187648... | 18 | 7 | IdealBench | B - bench | |
| 33 | 104702 | Wilshire / Bundy | 34.04408 | -118.46771 | Wilshire / Bundy (104702) | POLYGON ((-13187665.153 4034722.417, -13187665... | 18 | 7 | IdealBench | B - bench |
stop_buffer.loc[stop_buffer['indexed'] == 'A - both', 'color'] = 'purple'
stop_buffer.loc[stop_buffer['indexed'] == 'B - bench', 'color'] = 'blue'
stop_buffer.loc[stop_buffer['indexed'] == 'C - shade', 'color'] = 'green'
stop_buffer.loc[stop_buffer['indexed'] == 'D - none', 'color'] = 'darkred'
import folium
m = folium.Map(location=[latitude,longitude], tiles='Stamen Terrain', zoom_start=12)
m
#add the stations
for index, row in stop_buffer.iterrows():
folium.Marker(
[row.latitude,row.longitude],
popup=row.indexed, #what pops up when you click
tooltip=row.unique_name, #when you hover on the icon
icon=folium.Icon(color=row.color)
).add_to(m)
m
m.save('route20.html')